# -*- coding:utf-8 -*-

'''
data form

车辆的状态分为四类:

unacc (Unacceptable 状况很差)
acc (Acceptable 状况一般)
good (Good 状况好)
vgood (Very good 状况非常好)

buying (购买价: vhigh, high, med, low)
maint (维护价: vhigh, high, med, low)
doors (几个门: 2, 3, 4, 5more)
persons (载人量: 2, 4, more)
lug_boot (贮存空间: small, med, big)
safety (安全性: low, med, high)

buying	maint	doors	persons	    lug_boot	safety	 condition
vhigh	vhigh	2	        2	    small	    low	        unacc
vhigh	vhigh	2	        2	    small	    med	unacc
vhigh	vhigh	2	        2	    small	    high	    unacc

'''


import pandas as pd
from urllib.request import urlretrieve
data_dir = "../data/car/car.csv"

'''
single data form:  vhigh,vhigh,2,2,small,med,unacc
[[1,0,0,0],
[1,0,0,0],
[1,0,0,0],
[1,0,0],
[1,0,0],
[0,1,0],
[1,0,0,0]
]
buying (购买价: vhigh, high, med, low) -->
maint (维护价: vhigh, high, med, low)
doors (几个门: 2, 3, 4, 5more)
persons (载人量: 2, 4, more)
lug_boot (贮存空间: small, med, big)
safety (安全性: low, med, high)
车辆的状态分为四类:
unacc (Unacceptable 状况很差)
acc (Acceptable 状况一般)
good (Good 状况好)
vgood (Very good 状况非常好)
'''


def download_data(down=True):
    if down:
        pass
        data_path, _ = urlretrieve("http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data", "../data/car/car.csv")

    # use pandas to view the data structure
    col_names = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"]
    data = pd.read_csv(data_dir, names=col_names)
    # data = pd.read_csv(data_dir)
    return data

def convert2onehot(data):
    # covert data to onehot representation   convert2onehot(data)
    data = pd.get_dummies(data, prefix=data.columns)
    return data

# with open(data_dir) as f:
#     lines = f.readlines()
#     print(lines[1])
#     for line in lines:
#         pass

# if __name__ == '__main__':
#     data = download_data(down=False)
#     new_data = convert2onehot(data)
#     # print(data.head()) # top 5
#     print("\nNum of data: ", len(data), "\n")  # 1728

    # view data values
    # for name in data.keys():
    #     print(name, pd.unique(data[name]))
    # print("\n", new_data.head(2))
    # new_data.to_csv("../data/car/car_onehot.csv", index=False)